import os
import sys
import gzip
import pysam
from Bio import SeqIO

library = sys.argv[1]

directory = "/osc-fs_home/mdehoon/Data/CASPARs/MiSeq/"

def parse_alignments(alignments):
    current = None
    for alignment1 in alignments:
        alignment2 = next(alignments)
        name1 = alignment1.qname
        name2 = alignment2.qname
        assert name1 == name2
        if name1 != current:
            if current:
                yield block
            current = name1
            block = []
        alignment = (alignment1, alignment2)
        block.append(alignment)
    if current:
        yield block
    

filename = "%s_READ1.fq.gz" % library
path = os.path.join(directory, "Fastq", filename)
print("Reading", path)
stream1 = gzip.open(path, "rt")
records1 = SeqIO.parse(stream1, "fastq")
filename = "%s_READ2.fq.gz" % library
path = os.path.join(directory, "Fastq", filename)
print("Reading", path)
stream2 = gzip.open(path, "rt")
records2 = SeqIO.parse(stream2, "fastq")

filename = "%s.bam" % library
path = os.path.join(directory, "Mapping", filename)
print("Reading", path)
alignments = pysam.AlignmentFile(path)
blocks = parse_alignments(alignments)
print("Writing", filename)
output = pysam.AlignmentFile(filename, "wb", template=alignments)
for record1, record2, block in zip(records1, records2, blocks):
    name1 = record1.id
    name2 = record2.id
    assert name1 == name2
    sequence1 = record1.seq
    sequence2 = record2.seq
    if 'N' in sequence1.lstrip("N").rstrip("N") or 'N' in sequence2.lstrip("N").rstrip("N"):
        alignment1 = pysam.AlignedSegment()
        alignment1.query_name = name1
        alignment1.is_unmapped = True
        alignment1.is_qcfail = True
        alignment1.is_read1 = True
        alignment1.is_paired = True
        alignment1.mate_is_unmapped = True
        alignment1.query_sequence = str(sequence1)
        output.write(alignment1)
        alignment2 = pysam.AlignedSegment()
        alignment2.query_name = name2
        alignment2.is_unmapped = True
        alignment2.is_qcfail = True
        alignment2.is_read2 = True
        alignment2.is_paired = True
        alignment2.mate_is_unmapped = True
        alignment2.query_sequence = str(sequence2)
        output.write(alignment2)
        print(name1)
        continue
    for i, alignment in enumerate(block):
        alignment1, alignment2 = alignment
        assert alignment1.qname == name1
        assert alignment2.qname == name2
        if i == 0:
            if alignment1.is_unmapped:
                assert alignment2.is_unmapped
                assert not alignment1.is_reverse
                assert not alignment2.is_reverse
                alignment1.query_sequence = str(sequence1)
                alignment2.query_sequence = str(sequence2)
            elif alignment1.is_reverse:
                assert not alignment2.is_reverse
                assert not alignment1.is_unmapped
                assert not alignment2.is_unmapped
                alignment1.query_sequence = str(sequence1.reverse_complement())
                alignment2.query_sequence = str(sequence2)
            else:
                assert alignment2.is_reverse
                assert not alignment1.is_unmapped
                assert not alignment2.is_unmapped
                alignment1.query_sequence = str(sequence1)
                alignment2.query_sequence = str(sequence2.reverse_complement())
            alignment1.is_secondary = False
            alignment2.is_secondary = False
        else:
            alignment1.is_secondary = True
            alignment2.is_secondary = True
        if not alignment1.is_unmapped:
            cigartuples = []
            for cigartuple in alignment1.cigartuples:
                operation, length = cigartuple
                if operation == pysam.CHARD_CLIP:
                    operation = pysam.CINS
                cigartuple = (operation, length)
                cigartuples.append(cigartuple)
            alignment1.cigartuples = cigartuples
            cigartuples = []
            for cigartuple in alignment2.cigartuples:
                operation, length = cigartuple
                if operation == pysam.CHARD_CLIP:
                    operation = pysam.CINS
                cigartuple = (operation, length)
                cigartuples.append(cigartuple)
            alignment2.cigartuples = cigartuples
        output.write(alignment1)
        output.write(alignment2)
output.close()
stream1.close()
stream2.close()
